		/* 	REPLICATION SCRIPT */
		/* Dec 13 2025 */


	* ------------------------------------------------------------------------------
	* PART 1: DESCRIPTIVE STATISTICS AND REGRESSION TABLES
	* Project: School Discipline Disparities Increase when Neighborhood Black Pop Changes
	* Authors: Candipan & Hailey (Science Advances)
	* Dec 2025
	* ------------------------------------------------------------------------------
	* PURPOSE: 
	* This script reproduces all descriptive- and regression-based tables 
	* from the main text (Table 1) and appendix (Tables S1-S7)
	*
	* INPUT DATA:  $data\sa_analysis_final.dta
	* OUTPUT:      $project\tables
	* ------------------------------------------------------------------------------
	* ------------------------------------------------------------------------------
	* INSTRUCTIONS:
	* 1. This runs as a standalone script provided users have pre-defined their global macros. 
	* 2. Ensure all global macros are set.
	* 3. Set Stata to version 17 or higher. Toggle on, if necessary (ln 34)
	* 4. Ensure that reghdfe is version 5 and up and all user-written packages are installed.
	* 5. Run the script.
	* ------------------------------------------------------------------------------


		//clear all
		set more off

	* VERSION REQUIREMENTS:
	
	* Set Stata version 	
	version 19 // Stata code successfully runs with versions 17-19 (Dec 2025)
 
	
	/********************************************************************	
	 Required user-written commands

	 This do-file uses several community-contributed Stata commands that
	 are NOT part of official Stata:

	   - reghdfe (must use version 5 or 6)
	   - ppmlhdfe
	   - estout (for eststo/esttab)
	   - coefplot

	 Please install these packages before running the code.	
	********************************************************************/
			
		
	*------------------------------------------------------------------------------	
		* SAFETY CHECKS: 
		*	Ensure the Main Master Script has been run, subfolders are set, 
		*		and analysis file is in the data folder. 
			** IMPORTANT: Mac users may need to change forward slashes (/) 
			* 		to back slashes (\) in the code below
	*------------------------------------------------------------------------------
	
	* 1. Check if the global macro to the project root folder is defined 			
		if "$project" == "" {
			display as error "Error: You must run main.do to define file paths first."
			exit
		}
		
	* 2. Check if the tables subfolder has been created		
		if "$tables" == "" {
			display as error "Error: You must make sure you have created a tables subfolder for this code to run."
			exit
		}		
		
	* 3. Check if the analysis file exists
			capture confirm file "$data/sa_analysis_final.dta"

			if _rc != 0 {
				display as error ///
				"Error: The file 'sa_analysis_final.dta' was not found in the folder: $data"
				display as error ///
				"Please check that you have unzipped the data correctly and that your data folder contains the correct files."
				exit
			}	
			 
	* 4. Confirm which version of reghdfe is installed 
	which reghdfe // version 6.12.3
	
		/*NOTE! 
			The reghdfe syntax works for versions 5 or later. */
			
			
	
	* ------------------------------------------------------------------------------
	* SETUP
	* ------------------------------------------------------------------------------
		
		* Set directory
		cd "$project"
		
		*Import analysis file
		use "$data\sa_analysis_final.dta", clear
		
			sort idschool year 


		*Label variables 
		capture labelvariables
		

			des 

	
	
		
		*--------------------------------------------*
		*----------		 Final Tables 	-------------*
		*--------------------------------------------*
		

		
		*********************************************************
		/* Begin Descriptive Analysis */
		*********************************************************
		
		capture log close 
		log using "$tables\Table1_SummaryStatistics_log.txt", text replace 
		
		//==============================================================
		// Table 1: Summary statistics of analysis sample 
		//==============================================================
		/* Correction: The number of neighborhood types is off by n=4*/
		
		*---------------------------*
		* Means and SDs  (top panel)
		*---------------------------*
		quietly ///
		table (typeb var) (year result)  if samp==1 , ///
			statistic(mean $meanvars) 	///	 
			statistic(sd  $meanvars) 	///	 				 
			nformat(%9.3f) ///
			title("Means and SDs")
			
			collect label levels result mean "Mean" sd "SD", modify
			collect preview
			
			* Export the top panel of table 
			collect export "$tables\Table1_SummaryStatistics.txt", replace
			collect clear 
		
		*---------------------------*
		* Proportions	(bottom panel)
		*---------------------------*
		quietly ///
		table typeb year  if samp==1 , ///
			statistic(proportion $propvars) ///	 
			nformat(%9.3f) ///
			title("Proportions")
			
			collect label levels result proportion "Proportion", modify
			collect preview	
			
			* Export the bottom panel of table 
			collect export "$tables\Table1_SummaryStatistics.txt", append
			collect clear
			
		*-------------------------------------------*	
		* Number of observations per neighborhood type
		*-------------------------------------------*
		quietly ///
		table typeb year  if samp==1 , statistic(count id) ///
			title("number of observations (by neighborhood type)")
		
			collect label levels result count "N", modify
			collect preview	
			
			* Export the n's per neighborhood type 
			collect export "$tables\Table1_SummaryStatistics.txt", append 		
			collect clear
			
			capture log close 
			

		//==============================================================
		// Appendix Table S1: OSS suspension rate levels and disparities 
		//		(overall and by neighborhood type)
		//==============================================================
		
		* School discipline: OSS rates  (top panel)
		table typeb year  if samp==1 , ///
			statistic(mean  oss_rate_bl oss_rate_wh ) 	///	 
			statistic(sd  oss_rate_bl oss_rate_wh ) 	///	 			
			nformat(%9.3f) ///
			title("School discipline: OSS rates  (top panel)")
	
			collect label levels result mean "Mean" sd "SD", modify
			collect preview	
			collect export "$tables\Table_S1_OSSRates_Disparities.txt", replace 
			collect clear
			
		* School discipline disparities	(bottom panel)
		table typeb year  if samp==1 , ///
			statistic(mean diff_oss_blwh2 irr_oss_blwh2) ///	 
			statistic(sd diff_oss_blwh2 irr_oss_blwh2) ///				
			 nformat(%9.3f) ///
			 title("School discipline disparities (bottom panel)")
	
			collect label levels result mean "Mean" sd "SD", modify
			collect preview	
			collect export "$tables\Table_S1_OSSRates_Disparities.txt", append 
			collect clear 
			
		* Number of observations per neighborhood type
		table typeb year  if samp==1 , statistic(count id) ///
			title("number of observations (by neighborhood type)")
		
			collect label levels result count "N", modify
			collect preview	
			
			* Export the n's per neighborhood type 
			collect export "$tables\Table_S1_OSSRates_Disparities.txt", append 		
			collect clear
			

			

		
		************************************************************
		/* Begin Regression Analysis */
		************************************************************
					
		xtset idschooln year 
		

    //==============================================================
    // Appendix Table S2. OSS Rate Difference Fixed Effects Model 
	//		(All neighborhoods)
    //==============================================================
	
		quietly ///				
		eststo s2m1: ///
		reghdfe diff_oss_blwh2 ///
			$neighvars 	///
			$schvars 	///
			i.year##(i.typeb) if $conditions , ///
			$regopts	

			
		* Output Appendix Table S2: Baseline Model
		esttab s2m1 ///
		using "$tables\Table_S2_BaselineRateFEmodel.txt", ///
			$tabopts drop( $omitref $neighvars *pblack* *phisp* *pfrpl* it_enroll2) ///
			mtitle title("Table S2: Baseline Model")
	
		

    //==============================================================
    // Appendix Table S3. OSS Rate Difference Fixed Effects Model 
	//		(by Initial Neighborhood White Composition)
    //==============================================================
	
		quietly ///	
		eststo s3m2: ///
		reghdfe diff_oss_blwh2 $neighvars  ///
			$schvars  ///
			i.year##(i.typeb ) if  $conditions  & predom==0  , ///
			$regopts
			
		quietly ///
		eststo s3m3: ///
		reghdfe diff_oss_blwh2 $neighvars  ///
			$schvars   ///
			i.year##(i.typeb ) if  $conditions  & predom==1  , ///
			$regopts
			
		quietly ///		
		eststo s3m4: ///
		reghdfe diff_oss_blwh2 $neighvars  ///
			$schvars  ///
			i.year##(i.typeb ) if  $conditions  & majnhw==1  , ///
			$regopts

		quietly ///			
		eststo s3m5: ///
		reghdfe diff_oss_blwh2 $neighvars  ///
			$schvars  ///
			i.year##(i.typeb ) if  $conditions  & majnhw2==1  , ///
			$regopts
			 
		* Output Appendix Table S3: Predominantly vs. Non-Predominantly White
		esttab s3m2 s3m3 s3m4 s3m5   ///
		using "$tables\Table_S3_PredomWhiteFEmodel.txt", ///
			$tabopts ///
			drop($omitref $neighvars *pblack* *phisp* *pfrpl* it_enroll2) ///
			mtitle title("Table S3: Heterogeneity by Initial Neighborhood White Composition") ///
			note("m2: non-predominantly white; m3: predominantly white (70%); m4: majority white (50%); m5: majority white (60%)")
			

		
    //==============================================================
    // Appendix Table S4. OSS Rate Difference Fixed Effects Model 
	//		By locale (igeotype: 0 = urban, 1 = suburban, 2 = rural)
    //==============================================================
	
		foreach x in urban suburb rural locale {
		capture drop `x'
		}
		
		tab igeotype, gen(locale)
		rename locale1 urban 
		rename locale2 suburb 
		rename locale3 rural 
		
		foreach x in urban suburb rural {
		quietly ///			
		eststo s4`x': ///
		reghdfe diff_oss_blwh2 $neighvars  ///
		$schvars ///
		i.year##(i.typeb ) if  $conditions  & `x'==1  , ///
				$regopts
		}
		
		
		*Output Appendix Table S4: Locale
		esttab s4urban s4suburb s4rural  ///
		using "$tables\Table_S4_LocaleRateFEmodel.txt", /// 
		$tabopts ///
			drop(  $omitref $neighvars *pblack* *phisp* *pfrpl* it_enroll2) ///
			mtitle title("Table S4: Rate Differences (by Locale)")	///
			note("m6: urban; m7: suburb/town; m8: rural")
			
			

		
    //==============================================================
    // Appendix Table S5. Poisson Regression OSS Rate Ratio Models
    //==============================================================
	
	
		capture drop m1
		quietly ///		
		eststo s5main: ///
		ppmlhdfe irr_oss_blwh2 $neighvars  ///
		$schvars  ///
		i.year##(i.typeb ) if  $conditions   , ///
			$regopts nocons eform d(m1)
		
		capture drop m1
		quietly ///		
		eststo s5nonpredom: ///
		ppmlhdfe irr_oss_blwh2 $neighvars  ///
		$schvars  ///
		i.year##(i.typeb ) if  $conditions & predom==0   , ///
			$regopts nocons eform d(m1)
		
		capture drop m1
		quietly ///		
		eststo s5predom: ///
		ppmlhdfe irr_oss_blwh2 $neighvars  ///
		$schvars  ///
		i.year##(i.typeb ) if  $conditions & predom==1   , ///
			$regopts nocons eform d(m1)
			
		foreach x in urban suburb rural {
		capture drop m1
		quietly ///		
		eststo s5`x': ///
		ppmlhdfe irr_oss_blwh2 $neighvars  ///
		$schvars  ///
		i.year##(i.typeb ) if  $conditions & `x'==1  , ///
			$regopts nocons eform d(m1)
		}	
			
		*Output Appendix Table S5: Poisson FE Rate Ratio Models
		esttab s5main s5nonpredom s5predom s5urban s5suburb s5rural  ///
		using "$tables\Table_S5_IRR_PoissonFE.txt", ///
		$tabopts2 ///
			drop(  $omitref2 $neighvars *pblack* *phisp* *pfrpl* it_enroll2) ///
			mtitle title("Table S5: Poission Fixed Effects Rate Ratios Models")	eform ///
			note("model output displays incident rate ratios")
						

		
    //==============================================================
    // Appendix Table S6. Sensitivity Check: Enrollment Filter
	//	Rate Difference Fixed Effects Models with 20 Black and White students		
    //==============================================================	
		
		
    * all neighborhoods
		estimates clear

		quietly ///		
		eststo s6main_max: ///
		reghdfe diff_oss_blwh2 ///
			$neighvars ///
			$schvars ///
			i.year##i.typeb ///
			if $conditions & $max , ///
			$regopts


    * by predom White
     foreach p in 0 1 {		
		quietly ///	 	
		eststo s6predom`p'_max: ///
		reghdfe diff_oss_blwh2 ///
			$neighvars ///
			$schvars ///
			i.year##i.typeb ///
			if $conditions  & predom==`p' & $max , ///
			$regopts
		}

    * by locale
    foreach p in 0 1 2 {
		quietly ///		
        eststo s6locale`p'_max: ///
		reghdfe diff_oss_blwh2 ///
            $neighvars ///
            $schvars ///
            i.year##i.typeb ///
            if $conditions  & igeotype==`p' & $max , ///
            $regopts
	}
   
   
 		*Output Appendix Table S6: Sensivity Analysis (Enrollment Filter)  
		esttab 	s6main_max s6predom0_max s6predom1_max ///
				s6locale0_max s6locale1_max s6locale2_max  ///
		using "$tables\Table_S6_Sensitivity_Enrollment.txt", ///
			$tabopts  ///
			drop( $omitref $neighvars *pblack* *phisp* *pfrpl* it_enroll2) ///
			mtitle title("Appendix Table S6: Sensitivity Check - Rate Difference Models") ///
			note("includes enrollment filter for 20 Black and White students in either year")
			
		
		
    //==============================================================
    // Appendix Table S7. Sensitivity Check: Enrollment Filter
	//	Rate Difference Fixed Effects Models 
	//	(by p.p. Change in Neighborhood Black Population)
    //==============================================================	
			

		* create indicators for neighborhoods with 10% increase/decrease in pct neigh Black  
			capture drop black_10 
			capture drop ngh_black_10
			gen black_10 = 0 if year == 2018
			replace black_10 = 1 if sgr_pnhb >= .10  & year == 2018
			replace black_10 = 2 if sgr_pnhb <= -.10 & year == 2018
			bysort idschool: egen ngh_black_10 = max(black_10)
			
			
		* All neighborhoods
		quietly ///		
		eststo s7main_10: ///
		reghdfe diff_oss_blwh2 ///
			$neighvars ///
			$schvars ///
			i.year##i.ngh_black_10 ///
			if $conditions, ///
			$regopts

		
		* By predom White
		foreach var in predom {
		foreach p in 0 1 {
			local v : label (`var') `p'
			quietly ///	
			eststo s7`var'`p'_10: ///
			reghdfe diff_oss_blwh2 ///
				$neighvars ///
				$schvars ///
				i.year##i.ngh_black_10 ///
				if $conditions & `var' == `p', ///
				$regopts		
			}
		}
		
		
		* By locale
		foreach p in 0 1 2 {
		local v : label (igeotype) `p'
		quietly ///		
		eststo s7locale`p'_10: ///
		reghdfe diff_oss_blwh2 ///
			$neighvars ///
			$schvars ///
			i.year##i.ngh_black_10 ///
			if $conditions & igeotype == `p', ///
			$regopts
		}

		*Output Appendix Table S7: Sensivity Analysis (Percentage-based Neigh Change)  		
		esttab s7main_10 s7predom0_10 s7predom1_10 s7locale0_10 s7locale1_10 s7locale2_10 ///
		using "$tables\Table_S7_Sensitivity_PctChange.txt", ///
			$tabopts ///
			drop( $neighvars *pblack* *phisp* *pfrpl* ///
			it_enroll2 0.ngh_black_10 2010.year 2010.year#0.ngh_black_10) ///
			mtitle title("Appendix Table S7: Sensitivity Check - Rate Difference Models") ///
			note("Neighborhood type by 10 percent change in % Black; locale 0=urban; locale 1=suburb/town' locale 2=rural")
				